import pandas as pd
from pathlib import Path
import altair as alt
alt.renderers.enable("html")
RendererRegistry.enable('html')
DATA_DIR = Path().resolve().joinpath("../data/")
RESULTS_DIR = DATA_DIR.joinpath("legislatives2024/raw")
raw_data = pd.read_excel(
RESULTS_DIR.joinpath(
"lg2024-resultats-circonscriptions-une-ligne-par-candidat2.xlsx"
)
)
raw_data.head()
| Departement | CodCirElec | LibCirElec | NbSap | NbSiePourvus | Inscrits | Abstentions | Votants | Blancs | Nuls | ... | NumPanneauCand | NomPsn | PrenomPsn | CivilitePsn | CodNuaCand | LibNuaCand | NbVoix | RapportExprimes | RapportInscrits | Elu | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 01 | 0101 | 1ère circonscription | 1 | 0 | 86843 | 25013 | 61830 | 929 | 406 | ... | 1 | LAHY | Éric | M. | EXG | Extrême gauche | 419 | 0.69 | 0.48 | NON |
| 1 | 01 | 0101 | 1ère circonscription | 1 | 0 | 86843 | 25013 | 61830 | 929 | 406 | ... | 2 | MAÎTRE | Christophe | M. | RN | Rassemblement National | 23819 | 39.37 | 27.43 | QUALIF T2 |
| 2 | 01 | 0101 | 1ère circonscription | 1 | 0 | 86843 | 25013 | 61830 | 929 | 406 | ... | 3 | BRETON | Xavier | M. | LR | Les Républicains | 14495 | 23.96 | 16.69 | QUALIF T2 |
| 3 | 01 | 0101 | 1ère circonscription | 1 | 0 | 86843 | 25013 | 61830 | 929 | 406 | ... | 4 | GUERAUD | Sébastien | M. | UG | Union de la gauche | 14188 | 23.45 | 16.34 | QUALIF T2 |
| 4 | 01 | 0101 | 1ère circonscription | 1 | 0 | 86843 | 25013 | 61830 | 929 | 406 | ... | 5 | VINCENT | Cyril | M. | DSV | Droite souverainiste | 197 | 0.33 | 0.23 | NON |
5 rows × 29 columns
Liste des nuances politiques présentes dans le jeu de données (UXD = Union de l'Extrême Droite)
raw_data["CodNuaCand"].unique()
array(['EXG', 'RN', 'LR', 'UG', 'DSV', 'ENS', 'EXD', 'DIV', 'ECO', 'DVD',
'REC', 'UXD', 'DVG', 'UDI', 'REG', 'DVC', 'HOR', 'COM', 'SOC',
'FI', 'VEC', 'RDG'], dtype=object)
Définition d'une colormap pour les futures visualisations
party_to_color = {
"EXG": "#ac2929",
"RN": "#0b5394",
"LR": "#0086ff",
"UG": "#e06666",
"DSV": "#0b5394",
"ENS": "#f1c232",
"EXD": "#3470a7",
"DIV": "#8e7cc3",
"ECO": "#8fce00",
"DVD": "#9fc5e8",
"REC": "#0b5394",
"UXD": "#0b5394",
"DVG": "#ea9999",
"UDI": "#ffe599",
"REG": "#a64d79",
"DVC": "#ffe599",
"HOR": "#ffe599",
"COM": "#cc0000",
"SOC": "#f0b7b7",
"FI": "#cc0000",
"VEC": "#8fce00",
"RDG": "#8fce00"
}
Il y a 3 possibilites pour la colonne "Elu"
raw_data["Elu"].unique()
array(['NON', 'QUALIF T2', 'OUI'], dtype=object)
already_elected_candidates = raw_data[raw_data["Elu"] == "OUI"]
second_round_candidates = raw_data[raw_data["Elu"] == "QUALIF T2"]
Dans un premier temps nous pouvons identifier le nombre de candidats élus dès le premier tour.
first_round_seats = (
already_elected_candidates["CodNuaCand"].value_counts().reset_index()
)
alt.Chart(first_round_seats).mark_arc().encode(
theta=alt.Theta("count:Q").title("Nombre de sièges"),
color=alt.Color("CodNuaCand:N")
.title("Nuance politique")
.scale(
domain=first_round_seats["CodNuaCand"].unique(),
range=[
party_to_color[nuance]
for nuance in first_round_seats["CodNuaCand"].unique()
],
),
tooltip=["CodNuaCand", "count"],
).properties(title="Répartition des sièges obtenus au 1er tour")
Dans combien de circonscriptions le RN est-il en tête ?
leading_party_by_circo = (
second_round_candidates.sort_values(["CodCirElec", "NbVoix"], ascending=False)
.groupby("CodCirElec")["CodNuaCand"]
.nth(0)
).to_frame()
leading_party_by_circo["CodCirElec"] = raw_data.loc[
leading_party_by_circo.index, "CodCirElec"
]
leading_party_by_circo = leading_party_by_circo.reset_index(drop=True).set_index(
"CodCirElec"
)
leads_per_party = leading_party_by_circo.value_counts().reset_index()
alt.Chart(leads_per_party).mark_bar().encode(
color=alt.Color("CodNuaCand:N")
.title("Nuance politique")
.scale(domain=list(party_to_color.keys()), range=list(party_to_color.values()))
.legend(None),
x=alt.X("count:Q"),
order=alt.Order("count:Q", sort="descending"),
tooltip=["CodNuaCand", "count"],
).properties(width=400, title="Partis politiques en tête")
Il est ensuite possible de visualiser toutes les configurations restantes pour le 2nd tour (duels, triangulaires, quadrangulaires)
grouped_results = (
raw_data[raw_data["Elu"] == "QUALIF T2"]
.sort_values(["CodCirElec", "NbVoix"], ascending=False)
.groupby("CodCirElec", group_keys=False)
)
all_configurations = grouped_results["CodNuaCand"].apply(list)
all_configurations
CodCirElec
0101 [RN, LR, UG]
0102 [RN, ENS, UG]
0103 [ENS, RN, UG]
0104 [RN, ENS, UG]
0105 [UXD, UG]
...
ZZ07 [ENS, UG]
ZZ08 [LR, ENS]
ZZ09 [UG, ENS]
ZZ10 [UG, ENS]
ZZ11 [ENS, UG]
Name: CodNuaCand, Length: 501, dtype: object
count_configurations = all_configurations.value_counts()
count_configurations.head(20)
CodNuaCand [RN, ENS, UG] 51 [RN, UG, ENS] 48 [RN, UG] 36 [UG, ENS, RN] 29 [UG, RN, ENS] 24 [RN, ENS] 24 [UG, RN] 23 [ENS, UG, RN] 21 [ENS, RN, UG] 16 [RN, LR] 16 [UG, ENS] 15 [RN, LR, UG] 12 [UXD, ENS, UG] 12 [ENS, UG] 11 [UXD, UG, ENS] 10 [LR, RN, UG] 9 [DVG, RN] 7 [RN, DVD] 6 [ENS, UXD, UG] 6 [DVD, RN] 5 Name: count, dtype: int64
count_configurations.head(5)
CodNuaCand [RN, ENS, UG] 51 [RN, UG, ENS] 48 [RN, UG] 36 [UG, ENS, RN] 29 [UG, RN, ENS] 24 Name: count, dtype: int64
Etudions maintenant les grandes tendances politiques. Pour des raisons de simplicité pour la modélisation nous classifions les partis politiques en 5 grandes tendances :
import json
with open(DATA_DIR.joinpath("legislatives2024/party_to_political_trend.json"), "r") as f:
party_to_political_groups = json.load(f)
group_to_color = {
"RN+": "#0b5394",
"DIV": "#8e7cc3",
"LR+": "#0086ff",
"NFP+": "#e06666",
"ENS+": "#f1c232",
}
raw_data["GroupPol"] = raw_data["CodNuaCand"].map(party_to_political_groups)
scores_by_group = (
raw_data.groupby(["CodCirElec", "GroupPol"])["RapportExprimes"].sum().reset_index()
)
scores_by_group
| CodCirElec | GroupPol | RapportExprimes | |
|---|---|---|---|
| 0 | 0101 | ENS+ | 11.68 |
| 1 | 0101 | LR+ | 23.96 |
| 2 | 0101 | NFP+ | 24.14 |
| 3 | 0101 | RN+ | 40.22 |
| 4 | 0102 | ENS+ | 24.21 |
| ... | ... | ... | ... |
| 2304 | ZZ10 | RN+ | 19.51 |
| 2305 | ZZ11 | DIV | 2.18 |
| 2306 | ZZ11 | ENS+ | 39.94 |
| 2307 | ZZ11 | NFP+ | 33.44 |
| 2308 | ZZ11 | RN+ | 24.43 |
2309 rows × 3 columns
scores_by_group = (
scores_by_group.sort_values(["CodCirElec", "RapportExprimes"], ascending=False)
.groupby("CodCirElec")["GroupPol"]
.nth(0)
.value_counts()
.reset_index()
)
alt.Chart(scores_by_group).mark_arc().encode(
theta=alt.Theta("count:Q"),
color=alt.Color("GroupPol:N")
.scale(domain=list(group_to_color.keys()), range=list(group_to_color.values()))
.title("Ensemble politique"),
tooltip=["count:Q"],
).properties(title="Rapports de force 1er tour par ensemble politique")
candidate_names_second_round_after = pd.read_csv(
RESULTS_DIR.joinpath(
"legislatives-2024-candidatures-france-entiere-tour-2-2024-07-03-15h37.csv"
),
delimiter=";",
)
candidate_names_second_round_after.head(2)
| Code département | Département | Code circonscription | Libellé circonscription | Numéro de panneau | N° dépôt | Sexe du candidat | Nom du candidat | Prénom du candidat | Date de naissance du candidat | Code nuance | Profession | Sortant | Sexe remplaçant | Nom remplaçant | Prénom remplaçant | Date de naissance remplaçant | Sortant remplaçant | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Ain | 101 | 1ère circonscription | 2 | 4 | M | MAÎTRE | Christophe | 22/04/1969 | RN | (22) - Commerçant et assimilé | NaN | M | MOUREY | Jean-Marie | 03/01/1965 | NaN |
| 1 | 1 | Ain | 101 | 1ère circonscription | 3 | 3 | M | BRETON | Xavier | 25/11/1962 | LR | (33) - Cadre de la fonction publique | OUI | M | FLOCHON | Jean-Yves | 15/08/1958 | NaN |
candidate_names_second_round_after = candidate_names_second_round_after[
["Code circonscription", "Nom du candidat", "Prénom du candidat", "Code nuance"]
]
# Corrections manuelles...
candidate_names_second_round_after = candidate_names_second_round_after.rename(
columns={
"Code circonscription": "CodCirElec",
"Prénom du candidat": "PrenomPsn",
"Nom du candidat": "NomPsn",
"Code nuance": "CodNuaCand",
}
)
candidate_names_second_round_after.loc[
candidate_names_second_round_after["NomPsn"] == "VÉZIÈS", "NomPsn"
] = "VEZIES"
candidate_names_second_round_after.loc[
candidate_names_second_round_after["NomPsn"] == "JEANDENAND", "PrenomPsn"
] = "Florianne"
candidate_names_second_round_after.loc[
candidate_names_second_round_after["NomPsn"] == "FOSSEY", "PrenomPsn"
] = "Veronique"
candidate_names_second_round_after.loc[
candidate_names_second_round_after["NomPsn"] == "CLEMENT", "NomPsn"
] = "CLÉMENT"
indices = pd.MultiIndex.from_frame(
candidate_names_second_round_after[["NomPsn", "PrenomPsn"]]
)
raw_data = raw_data.set_index(["NomPsn", "PrenomPsn"])
raw_data["valid_round_two"] = False
raw_data.loc[indices, "valid_round_two"] = True
raw_data = raw_data.reset_index()
print(
f"Avant désistements: {raw_data[raw_data['Elu'] == 'QUALIF T2'].shape[0]}, après: {raw_data['valid_round_two'].sum()}"
)
Avant désistements: 1318, après: 1095
raw_data.to_csv(DATA_DIR.joinpath("legislatives2024/computed/data.csv"))
alt.Chart(raw_data[raw_data["Elu"] == "QUALIF T2"]).mark_bar().encode(
y=alt.Y("count():Q").title("Nombre de candidats"),
x=alt.X("CodNuaCand:N").title("Nuance politique").sort("-y"),
color=alt.Color("CodNuaCand:N")
.title("Nuance politique")
.scale(domain=list(party_to_color.keys()), range=list(party_to_color.values())),
opacity=alt.Opacity("valid_round_two:N").title("En place au 2nd tour"),
tooltip=["count():Q"],
).properties(title="Visualisation des désistements par parti")
focus_circo_0101 = raw_data[raw_data["CodCirElec"] == "0101"]
alt.Chart(focus_circo_0101).mark_bar().encode(
x=alt.X("NbVoix:Q").title("Nombre de voix au premier tour"),
color=alt.Color("GroupPol:N").scale(domain=list(group_to_color.keys()), range=list(group_to_color.values())),
y=alt.Y("valid_round_two:N").title("Present au 2nd tour"),
).properties(title="Rapports de force (circonscription 0101)")
import geopandas as gpd
contours_circos = gpd.read_file(
DATA_DIR.joinpath("insee/circonscriptions_legislatives_030522.shp")
)
contours_circos["is_overseas"] = contours_circos["dep"].str.len() > 2
contours_circos = contours_circos.rename(columns={"id_circo": "CodCirElec"})
contours_circos.head()
| CodCirElec | dep | libelle | geometry | is_overseas | |
|---|---|---|---|---|---|
| 0 | 97302 | 973 | Guyane - 2e circonscription | MULTIPOLYGON (((-54.60236 2.33356, -54.60242 2... | True |
| 1 | 97301 | 973 | Guyane - 1re circonscription | MULTIPOLYGON (((-51.93691 4.46648, -51.93667 4... | True |
| 2 | 97201 | 972 | Martinique - 1re circonscription | MULTIPOLYGON (((-61.00495 14.57791, -61.00492 ... | True |
| 3 | 97202 | 972 | Martinique - 2e circonscription | MULTIPOLYGON (((-61.12889 14.63162, -61.1289 1... | True |
| 4 | 97203 | 972 | Martinique - 3e circonscription | POLYGON ((-61.03945 14.64265, -61.0394 14.6425... | True |
leading_party_by_circo
| CodNuaCand | |
|---|---|
| CodCirElec | |
| ZZ11 | ENS |
| ZZ10 | UG |
| ZZ09 | UG |
| ZZ08 | LR |
| ZZ07 | ENS |
| ... | ... |
| 0105 | UXD |
| 0104 | RN |
| 0103 | ENS |
| 0102 | RN |
| 0101 | RN |
501 rows × 1 columns
contours_circos["leading_party"] = contours_circos["CodCirElec"].map(leading_party_by_circo["CodNuaCand"])
contours_circos.loc[contours_circos["leading_party"].isna(), "leading_party"] = "1er tour"
Pour des raisons de praticité de la visualisation, seule la France hexagonale est représentée.
single_selector = alt.selection_single(
fields=["CodCirElec"], on="click", clear=True
)
alt.Chart(contours_circos[~contours_circos["is_overseas"]]).mark_geoshape().encode(
tooltip=["CodCirElec:N", "leading_party:N"],
opacity=alt.condition(single_selector, alt.value(1), alt.value(0.6)),
color=alt.Color("leading_party:N", legend=None).scale(
domain=list(party_to_color.keys()) + ["1er tour"],
range=list(party_to_color.values()) + ["grey"],
),
).add_selection(single_selector).properties(
title="Vue France entière (2nd tour, circonscriptions acquises en gris)", width=500
) & alt.Chart(
raw_data
).mark_bar().encode(
x=alt.X("NbVoix:Q").title("Nombre de voix au premier tour"),
color=alt.Color("CodNuaCand:N").scale(domain=list(party_to_color.keys()), range=list(party_to_color.values())),
y=alt.Y("valid_round_two:N").title("Present au 2nd tour"),
tooltip=["GroupPol:N", "NbVoix:Q"]
).add_selection(
single_selector
).transform_filter(
single_selector
).properties(
title=f"Rapports de force pour la circonscription", width=500
)
/home/victor/Data/projets_perso/projet_elections/.venv/lib/python3.10/site-packages/altair/utils/deprecation.py:65: AltairDeprecationWarning: 'selection_single' is deprecated. Use 'selection_point' warnings.warn(message, AltairDeprecationWarning, stacklevel=1) /home/victor/Data/projets_perso/projet_elections/.venv/lib/python3.10/site-packages/altair/utils/deprecation.py:65: AltairDeprecationWarning: 'add_selection' is deprecated. Use 'add_params' instead. warnings.warn(message, AltairDeprecationWarning, stacklevel=1)